from lxml import etree
# tree = etree.parse("b.html")
# result = tree.xpath("/html/li[1]/*/a/text()") *任意标签
# result = tree.xpath("/html/li[@class='xxx']/*/a/text()") class = 'xxx' li下的a标签的text
# result = tree.xpath("/html/li[@class='xxx']/*/a/@href") a标签的href值

'''
爬取猪八戒网的商品信息
'''

import requests
url = "https://xian.zbj.com/search/f/?kw=saas"
heards = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36"
}

resp = requests.get(url=url, headers=heards)
resp.encoding = "utf-8"
t = resp.text
#解析
html = etree.HTML(t)
divs = html.xpath("/html/body/div[6]/div/div/div[2]/div[5]/div[1]/div")
i = 0
for div in divs:
    price = div.xpath("./div/div/a[2]/div[2]/div[1]/span[1]/text()")
    title = ("saas").join(div.xpath("./div/div/a[2]/div[2]/div[2]/p/text()"))
    location = div.xpath("./div/div/a[1]/div[1]/div/span/@title")
    company = div.xpath("./div/div/a[1]/div[1]/p/text()")[1].strip()
    print(title)
    print(location)
    print(company)
    if i == 6:
        break
    i += 1